InĀ [1]:
%%capture
!pip install google-cloud-bigquery
!pip install db-dtypes
!pip install plotly
InĀ [2]:
import os
from IPython.display import display, HTML
from google.cloud import bigquery
import warnings
warnings.filterwarnings("ignore", message="BigQuery Storage module not found")
bigquery_client = bigquery.Client()
Get some metrics¶
InĀ [3]:
query = """
with
month_limit as (
select
date_sub(
max(date_trunc(date, month)),
interval 10 year
) as ten_years_ago,
date_sub(
max(date_trunc(date, month)),
interval 12 month
) as last_year,
date_sub(
max(date_trunc(date, month)),
interval 1 month
) as previous_month,
max(date_trunc(date, month)) as current_month
from bigquery-public-data.iowa_liquor_sales.sales
)
select
case date_trunc(sales.date, month)
when month_limit.current_month then 'current_month'
when month_limit.previous_month then 'previous_month'
when month_limit.last_year then 'last_year'
when month_limit.ten_years_ago then 'ten_years_ago'
end as month,
count(*) as number_of_sales,
cast(sum(volume_sold_liters) as int64) as volume_sold_in_liters,
cast(sum(sale_dollars) as int64) as revenue,
count(distinct(store_number)) as number_of_selling_stores
from bigquery-public-data.iowa_liquor_sales.sales
inner join month_limit
on date_trunc(sales.date, month) in (
month_limit.last_year,
month_limit.previous_month,
month_limit.current_month,
month_limit.ten_years_ago
)
group by
month
"""
metrics_df = bigquery_client.query(query).to_dataframe()
metrics_df.set_index('month', inplace=True)
metrics_df = metrics_df.T
metrics_df.reset_index(inplace=True)
metrics_df = metrics_df.rename(columns={'index': 'metric_name'})
metrics_df['metric_name'] = [' '.join(word.capitalize() for word in k.split('_')) for k in metrics_df['metric_name']]
metrics = metrics_df.to_dict('records')
print(metrics)
[{'metric_name': 'Number Of Sales', 'last_year': 203408, 'previous_month': 200509, 'current_month': 176352, 'ten_years_ago': 158636}, {'metric_name': 'Volume Sold In Liters', 'last_year': 1833718, 'previous_month': 1628652, 'current_month': 1648290, 'ten_years_ago': 1494081}, {'metric_name': 'Revenue', 'last_year': 35101475, 'previous_month': 31111678, 'current_month': 31308696, 'ten_years_ago': 21224352}, {'metric_name': 'Number Of Selling Stores', 'last_year': 1916, 'previous_month': 1962, 'current_month': 1927, 'ten_years_ago': 1209}]
Define the html to display the metrics¶
InĀ [4]:
from IPython.display import display, HTML
import jinja2
def get_metric_card(metric_name, metric_period):
for metric_dict in metrics:
if metric_dict['metric_name'] == metric_name:
break
metric_dict['current_month_str'] = "{:,.0f}".format(metric_dict['current_month'])
if metric_dict['metric_name'] == 'Revenue':
metric_dict['current_month_str'] = '$' + metric_dict['current_month_str']
for k in ['previous_month', 'last_year', 'ten_years_ago']:
metric_dict[f'{k}_diff'] = (metric_dict['current_month'] - metric_dict[k])/metric_dict[k]
metric_dict[f'{k}_diff_str'] = "{:.1%}".format(metric_dict[f'{k}_diff'])
if metric_dict[f'{k}_diff'] > 0:
metric_dict[f'{k}_diff_str'] = '+' + metric_dict[f'{k}_diff_str']
metric_dict[f'{k}_color'] = 'green' if metric_dict[f'{k}_diff'] >=0 else 'red'
metric_dict['metric_period'] = metric_period
template_html = jinja2.Template("""
<div style="display: flex; justify-content: center; align-items: center;">
<div style="text-align: center; margin: auto;">
<h3 style="margin: 0">{{ metric_period }}</h3>
<h1 style="margin: 5px">{{ metric_name }}</h1>
<h2 style="margin: 10px">{{ current_month_str }}</h2>
<hr style="margin: 0">
<span style="margin: 0; color: {{ previous_month_color }};">{{ previous_month_diff_str }}</span>
<div>vs last month</div>
<hr style="margin: 0">
<span style="margin: 0; color: {{ last_year_color }};">{{ last_year_diff_str }}</span>
<div>vs last year</div>
<hr style="margin: 0">
<span style="margin: 0; color: {{ ten_years_ago_color }};">{{ ten_years_ago_diff_str }}</span>
<div>vs 10 years ago</div>
</div>
</div>
""")
html = template_html.render(**metric_dict)
display(HTML(html))
Get the latest month with data¶
InĀ [5]:
query = """
select
format_date('%B %Y', max(date_trunc(date, month))) as lattest_month
from bigquery-public-data.iowa_liquor_sales.sales
"""
metric_period = bigquery_client.query(query).to_dataframe()['lattest_month'][0]
InĀ [6]:
get_metric_card('Volume Sold In Liters', metric_period)
February 2025
Volume Sold In Liters
1,648,290
+1.2%
vs last month
-10.1%
vs last year
+10.3%
vs 10 years ago
InĀ [7]:
get_metric_card('Number Of Sales', metric_period)
February 2025
Number Of Sales
176,352
-12.0%
vs last month
-13.3%
vs last year
+11.2%
vs 10 years ago
InĀ [8]:
get_metric_card('Revenue', metric_period)
February 2025
Revenue
$31,308,696
+0.6%
vs last month
-10.8%
vs last year
+47.5%
vs 10 years ago
InĀ [9]:
get_metric_card('Number Of Selling Stores', metric_period)
February 2025
Number Of Selling Stores
1,927
-1.8%
vs last month
+0.6%
vs last year
+59.4%
vs 10 years ago
Yearly Sales¶
InĀ [10]:
query = """
with
year_limit as (
select
max(extract(year from date)) as last_year
from bigquery-public-data.iowa_liquor_sales.sales
)
select
extract(year from date) as year,
count(*) as nb_of_sales,
sum(sale_dollars) as revenue
from bigquery-public-data.iowa_liquor_sales.sales
where
extract(year from date) < (select last_year from year_limit)
group by
year
order by
year
"""
yearly_sales_df = bigquery_client.query(query).to_dataframe()
InĀ [11]:
import plotly.express as px
from plotly.subplots import make_subplots
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
px.line(yearly_sales_df, x="year", y="nb_of_sales", title='Yearly Sales').data[0],
secondary_y=False,
)
revenue_trace = px.line(yearly_sales_df, x="year", y="revenue", title='Yearly Sales').data[0]
revenue_trace.update(line=dict(color="red"))
fig.add_trace(revenue_trace, secondary_y=True)
fig.update_yaxes(title_text="Number of Sales", secondary_y=False)
fig.update_yaxes(title_text="Revenue", secondary_y=True)
fig.update_layout(
title={
'text': '<b>Yearly Sales with Revenue</b>',
'x':0.5,
'font': {'size': 25, 'family': 'Arial'}
},
margin={'r': 50, 't': 50, 'l': 50, 'b': 50}
)
fig.show()
Monthly Sales¶
InĀ [12]:
query = """
with
month_limit as (
select
date_sub(
max(date_trunc(date, month)),
interval 11 month
) as start_month,
from bigquery-public-data.iowa_liquor_sales.sales
)
select
date_trunc(sales.date, month) as month,
count(*) as nb_of_sales,
sum(sale_dollars) as revenue
from bigquery-public-data.iowa_liquor_sales.sales
inner join month_limit
on date_trunc(sales.date, month) >= month_limit.start_month
group by
month
order by
month
"""
monthly_sales_df = bigquery_client.query(query).to_dataframe()
InĀ [13]:
import plotly.express as px
from plotly.subplots import make_subplots
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
px.line(monthly_sales_df, x="month", y="nb_of_sales", title='Monthly Sales').data[0],
secondary_y=False,
)
revenue_trace = px.line(monthly_sales_df, x="month", y="revenue", title='Monthly Revenue').data[0]
revenue_trace.update(line=dict(color="red")) # Set line color to red
fig.add_trace(revenue_trace, secondary_y=True)
fig.update_yaxes(title_text="Number of Sales", secondary_y=False)
fig.update_yaxes(title_text="Revenue", secondary_y=True)
fig.update_layout(
title={
'text': '<b>Monthly Sales with Revenue</b>',
'x':0.5,
'font': {'size': 25, 'family': 'Arial'}
},
margin={'r': 50, 't': 50, 'l': 50, 'b': 50}
)
fig.show()
Volume Sold In Liters Per County¶
InĀ [14]:
import plotly.express as px
import json
query = """
with
month_limit as (
select
max(date_trunc(date, month)) as last_month
from bigquery-public-data.iowa_liquor_sales.sales
)
select
county_fips_code,
county_name,
sum(volume_sold_liters) as volume_sold_liters
from bigquery-public-data.iowa_liquor_sales.sales
inner join month_limit
on month_limit.last_month = date_trunc(sales.date, month)
inner join bigquery-public-data.geo_us_boundaries.counties
on st_contains(counties.county_geom, sales.store_location)
group by
county_fips_code,
county_name
"""
county_volume_df = bigquery_client.query(query).to_dataframe()
with open("./geojsons/geojson-counties-fips.json") as f:
counties = json.load(f)
target_states = ['19']
counties['features'] = [f for f in counties['features'] if f['properties']['STATE'] in target_states]
fig = px.choropleth(county_volume_df, geojson=counties, locations='county_fips_code', color='volume_sold_liters',
color_continuous_scale='Viridis_r', # Reversed color scale
range_color=(min(county_volume_df['volume_sold_liters']), max(county_volume_df['volume_sold_liters'])),
scope='usa',
hover_data=['county_name', 'county_fips_code', 'volume_sold_liters'], # Specify additional data for tooltip
labels={'volume_sold_liters': 'Volume Sold Liters', 'county_name': 'County', 'county_fips_code': 'FIPS'} # Corrected labels
)
fig.update_layout(margin={'r': 50, 't': 30, 'l': 50, 'b': 50})
fig.update_geos(
center=dict(lon=-93.5, lat=41.9), # Coordinates for Iowa
projection_scale=5
)
fig.update_layout(
title={
'text': f'<b>Volume Sold In Liters Per County ({metric_period})</b>',
'x':0.5,
'font': {'size': 25, 'family': 'Arial'}
},
margin={'r': 50, 't': 50, 'l': 50, 'b': 50}
)
fig.show()
Types of drink sold¶
InĀ [15]:
import plotly.graph_objects as go
query = """
with
month_limit as (
select
max(date_trunc(date, month)) as last_month
from bigquery-public-data.iowa_liquor_sales.sales
),
last_month_sales_with_types as (
select
sales.*,
case
when lower(category_name) like '%whiskies%' then 'Whiskey'
when lower(category_name) like '%whiskey%' then 'Whiskey'
when lower(category_name) like '%rum%' then 'Rum'
when lower(category_name) like '%vodka%' then 'Vodka'
when lower(category_name) like '%tequila%' then 'Tequila'
when lower(category_name) like '%gin%' then 'Gin'
when lower(category_name) like '%shnapps' then 'Shnapps'
when lower(category_name) like '%shnapps' then 'Shnapps'
when lower(category_name) like '%brandies%' then 'Brandy'
when lower(category_name) like '%brandy%' then 'Brandy'
else 'Others'
end as drink_type
from bigquery-public-data.iowa_liquor_sales.sales
inner join month_limit
on date_trunc(sales.date, month) = month_limit.last_month
)
select
drink_type,
sum(volume_sold_liters) as volume_sold_liters
from last_month_sales_with_types
group by
drink_type
"""
last_month_drink_types_df = bigquery_client.query(query).to_dataframe()
InĀ [16]:
import plotly.graph_objects as go
fig = go.Figure(data=[go.Pie(labels=last_month_drink_types_df['drink_type'],
values=last_month_drink_types_df['volume_sold_liters'],
textinfo='label+percent',
insidetextorientation='radial',
showlegend=False
)])
fig.update_layout(
title={
'text': f'<b>Types of Drinks Sold ({metric_period})</b>',
'x':0.5,
'font': {'size': 25, 'family': 'Arial'}
},
xaxis_title='Month',
margin={'r': 50, 't': 50, 'l': 50, 'b': 50}
)
fig.show()
Top Vendors¶
InĀ [17]:
query = """
with
month_limit as (
select
date_sub(
max(date_trunc(date, month)),
interval 11 month
) as first_month,
max(date_trunc(date, month)) as last_month
from bigquery-public-data.iowa_liquor_sales.sales
),
top_vendors as (
select
vendor_name,
sum(sale_dollars) as revenue
from bigquery-public-data.iowa_liquor_sales.sales
inner join month_limit
on date_trunc(sales.date, month) = month_limit.last_month
group by vendor_name
order by revenue desc
limit 3
)
select
sales.vendor_name,
date_trunc(sales.date, month) as month,
sum(sale_dollars) as revenue
from bigquery-public-data.iowa_liquor_sales.sales
inner join top_vendors
on top_vendors.vendor_name = sales.vendor_name
inner join month_limit
on month_limit.first_month <= date_trunc(sales.date, month)
group by
sales.vendor_name,
month
order by
month,
vendor_name
"""
top_vendors_df = bigquery_client.query(query).to_dataframe()
InĀ [18]:
import plotly.graph_objects as go
import numpy as np
top_vendors = top_vendors_df['vendor_name'].unique()
x = np.array(top_vendors_df['month'].unique())
title = 'Main Source for News'
labels = top_vendors
colors = ['rgb(67,67,67)', 'rgb(115,115,115)', 'rgb(49,130,189)']
mode_size = [8, 8, 12]
line_size = [2, 2, 4]
fig = go.Figure()
for i in range(0, len(top_vendors)):
current_vendor = top_vendors[i]
y = top_vendors_df[top_vendors_df['vendor_name'] == current_vendor]['revenue']
fig.add_trace(go.Scatter(x=x, y=y, mode='lines',
name=labels[i],
line=dict(color=colors[i], width=line_size[i]),
connectgaps=True,
))
fig.update_layout(
title={
'text': '<b>Top Vendors</b>',
'x':0.5,
'font': {'size': 25, 'family': 'Arial'}
},
margin={'r': 50, 't': 50, 'l': 50, 'b': 50}
)
fig.show()
Daily Volume Sold Distribution¶
InĀ [19]:
query = """
with
month_limit as (
select
date_sub(
max(date_trunc(date, month)),
interval 11 month
) as first_month
from bigquery-public-data.iowa_liquor_sales.sales
)
select
date,
sum(volume_sold_liters) as volume_sold_liters
from bigquery-public-data.iowa_liquor_sales.sales
inner join month_limit
on month_limit.first_month <= date_trunc(sales.date, month)
group by
date
"""
daily_liters_df = bigquery_client.query(query).to_dataframe()
daily_liters_df
Out[19]:
| date | volume_sold_liters | |
|---|---|---|
| 0 | 2024-06-11 | 84209.26 |
| 1 | 2024-08-28 | 93741.79 |
| 2 | 2024-08-29 | 100172.53 |
| 3 | 2025-01-28 | 62787.34 |
| 4 | 2024-09-24 | 84851.94 |
| ... | ... | ... |
| 310 | 2024-08-22 | 70282.41 |
| 311 | 2025-02-19 | 81866.15 |
| 312 | 2024-03-27 | 86072.00 |
| 313 | 2024-03-13 | 88884.68 |
| 314 | 2024-06-13 | 79710.97 |
315 rows Ć 2 columns
InĀ [20]:
import plotly.express as px
fig = px.histogram(daily_liters_df, x="volume_sold_liters",
marginal="box",
hover_data=daily_liters_df.columns)
fig.update_layout(
title={
'text': '<b>Daily Volume Sold Distribution For The Past 12 Months</b>',
'x':0.5,
'font': {'size': 25, 'family': 'Arial'}
},
xaxis_title='Volume Sold in Liters',
yaxis_title='Count of Days',
margin={'r': 50, 't': 50, 'l': 50, 'b': 50}
)
fig.show()
InĀ [21]:
from datetime import datetime
utc_timestamp = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
print(f"Updated at: {utc_timestamp}")
Updated at: 2025-03-09 02:04:16